In [1]:
import pymc3 as pm
import numpy
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from scipy import stats
sns.set(font_scale=1.5)

%matplotlib inline

Coin flip learn things along the way, what can be observed to get the right answer

  1. Build a dataset from a contrived distribution where we will know if I get the right answer
  2. Build that in Pymc3
  3. Experiment with different observation combinations to see how to get the right answer.

Refs:

Example on an unfair 6 sided die


In [2]:
observations = np.array([20, 6, 6, 6, 6, 6])
with pm.Model():
    probs = pm.Dirichlet('probs', a=np.ones(6))  # flat prior
    rolls = pm.Multinomial('rolls', n=50, p=probs, observed=observations)
    trace = pm.sample(5000)
pm.plot_posterior(trace);


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [probs]
Sampling 2 chains, 0 divergences: 100%|██████████| 11000/11000 [00:06<00:00, 1741.12draws/s]

In [3]:
pm.traceplot(trace)


Out[3]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x12624f890>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1262fe950>]],
      dtype=object)

Now a 4-sided die and pull out hierarchial info

Get the right answer from the data plain


In [4]:
# fair fwould be all the same, we want different
# 1/4 = 0.25, so we want [10%, 20%, 30%, 40%]
observations = np.array([50*.1, 50*.2, 50*.3 ,50*.4])
with pm.Model():
    probs = pm.Dirichlet('probs', a=np.ones(4))  # flat prior
    rolls = pm.Multinomial('rolls', n=observations.sum(), p=probs, observed=observations)
    trace = pm.sample(5000)
# pm.traceplot(trace)
pm.plot_posterior(trace);


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [probs]
Sampling 2 chains, 0 divergences: 100%|██████████| 11000/11000 [00:05<00:00, 2191.48draws/s]

Build the whole thing out the Bernoulli dists


In [5]:
# this is the number of 1,2,3,4
N_rolls = 50
observations = np.array([N_rolls*.1, N_rolls*.2, N_rolls*.3 ,N_rolls*.4])
# so the data for even and odd is, even = 1
obs_evenodd = [1]*observations[np.asarray([1, 3])].sum().astype(int) + [0]*observations[np.asarray([0, 2])].sum().astype(int)
# make then obs_2_4 from even, 2=True
obs_2_4 = [1]*observations[1].astype(int) + [0]*observations[3].astype(int)
# make then obs_1_3 from even, 1=True
obs_1_3 = [1]*observations[0].astype(int) + [0]*observations[2].astype(int)

In [6]:
with pm.Model() as our_first_model:
    p_evenodd = pm.Beta('p_evenodd', alpha=1, beta=1)
    evenodd = pm.Bernoulli('evenodd', p=p_evenodd, observed=obs_evenodd)
    
    p_2_4 = pm.Beta('p_2_4', alpha=1, beta=1)
    b_2_4 = pm.Bernoulli('b_2_4', p=p_2_4, observed=obs_2_4)
    
    p_1_3 = pm.Beta('p_1_3', alpha=1, beta=1)
    b_1_3 = pm.Bernoulli('b_1_3', p=p_1_3, observed=obs_1_3)
    
    
    p1 = pm.Deterministic('p1', (1-p_evenodd)*p_1_3)
    p2 = pm.Deterministic('p2', (p_evenodd)*p_2_4)
    p3 = pm.Deterministic('p3', (1-p_evenodd)*(1-p_1_3))
    p4 = pm.Deterministic('p4', (p_evenodd)*(1-p_1_3))
    
    trace = pm.sample(5000,)

pm.traceplot(trace, var_names=['p1', 'p2', 'p3', 'p4'])
pm.plot_posterior(trace, var_names=['p1', 'p2', 'p3', 'p4']);


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [p_1_3, p_2_4, p_evenodd]
Sampling 2 chains, 0 divergences: 100%|██████████| 11000/11000 [00:06<00:00, 1823.94draws/s]

In [ ]:


In [ ]:


In [ ]:


In [ ]: